### HW 8 solutions ### stirrett <- read.delim('ecol 563/Stirrett.txt') # create raw data trt1 <- rep(stirrett$Count, stirrett$Trt1) trt2 <- rep(stirrett$Count, stirrett$Trt2) trt3 <- rep(stirrett$Count, stirrett$Trt3) trt4 <- rep(stirrett$Count, stirrett$Trt4) # assemble results and add a treatment variable borer <- data.frame(count=c(trt1, trt2, trt3, trt4), treatment=rep(1:4, c(length(trt1), length(trt2), length(trt3), length(trt4)))) # add the two factors that define the treatments borer$early <- factor(ifelse(borer$treatment %in% c(3,4),'present','absent')) borer$late <- factor(ifelse(borer$treatment %in% c(2,4),'present','absent')) # fit interaction model library(MASS) out.NB1 <- glm.nb(count~early*late, data=borer # fit one-way design out.NB1a <- glm.nb(count~factor(treatment), data=borer) # two-factor interaction model and one-way design are equivalent AIC(out.NB1,out.NB1a) sapply(list(out.NB1,out.NB1a),logLik) # sequential analysis of deviance table anova(out.NB1) # refit main effects model out.NB2 <- glm.nb(count~early, data=borer) anova(out.NB2) summary(out.NB2) # mean count of early treatment is 40% that of no treatment exp(coef(out.NB1)[2]) #### goodness of fit ##### # create data set of counts junk<-data.frame(Count=0:26) # merge with tabulated data borer.table <- merge(stirrett, junk, all=T) # change missing values to zero temp <- sapply(2:5, function(x) ifelse(is.na(borer.table[,x]), 0, borer.table[,x])) # assign result to data frame borer.table[,2:5] <- temp borer.table # create the counts vector, stack the frequency columns, add a treatment column final.borer <- data.frame(count=rep(borer.table$Count,4), freq=unlist(borer.table[,2:5]), treatment=rep(1:4, each=nrow(borer.table))) dim(final.borer) # create the early and late factors final.borer$early <- factor(ifelse(final.borer$treatment %in% c(3,4), 'present', 'absent')) final.borer$late <- factor(ifelse(final.borer$treatment %in% c(2,4), 'present', 'absent')) # add predicted treatment mean to the data frame final.borer$mu <- predict(out.NB2, type='response', newdata=data.frame(early=final.borer$early)) # calculate negative binomial probabilities final.borer$p <- dnbinom(final.borer$count ,mu=final.borer$mu, size=out.NB2$theta) # probabilities do not sum to one tapply(final.borer$p, final.borer$treatment, sum) # add tail probability to P(X = 26) entry final.borer$p <- final.borer$p + ifelse(final.borer$count==26, 1-pnbinom(26,mu=final.borer$mu, size=out.NB2$theta), 0) tapply(final.borer$p,final.borer$treatment,sum) final.borer[1:12,] # count number of observations in each treatment tapply(final.borer$freq, final.borer$treatment, sum) n <- tapply(final.borer$freq, final.borer$treatment, sum) # calculate expected counts final.borer$exp <- final.borer$p*n[final.borer$treatment] # graphical goodness of fit library(lattice) xyplot(freq~count|early+late, data=final.borer, ylab='frequency', panel=function(x,y,subscripts) { panel.xyplot(x, y, type='h', lwd=8, col='grey',lineend=1) panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)}) # improve labeling of the panels xyplot(freq~count|factor(early, levels=levels(early), labels=paste("Early = ", levels(early), sep='')) + factor(late, levels=levels(late), labels=paste("Late = ", levels(late),sep='')), data=final.borer, ylab='frequency', panel=function(x, y, subscripts) { panel.xyplot(x, y, type='h', lwd=8, col='grey',lineend=1) panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)}) # place panels on top and on the left library(latticeExtra) xyplot(freq~count|factor(early, levels=levels(early), labels=paste("Early = ", levels(early),sep='')) + factor(late, levels=levels(late), labels=paste("Late = ", levels(late),sep='')), data=final.borer, ylab='frequency', panel=function(x, y, subscripts) { panel.xyplot(x, y, type='h', lwd=8, col='grey', lineend=1) panel.xyplot(x, final.borer$exp[subscripts], pch=16, cex=.7, type='o', col=1)}) -> mygraph useOuterStrips(mygraph) # parametric bootstrap goodness of fit borer.p <- split(final.borer$p, final.borer$treatmen) myfunc <- function() { out.obs <- as.vector(sapply(1:4, function(x) rmultinom(1, size=n[x], prob=borer.p[[x]]))) sum((out.obs-final.borer$exp)^2/final.borer$exp) } set.seed(100) sim.data <- replicate(9999, myfunc()) actual <- sum((final.borer$freq-final.borer$exp)^2/final.borer$exp) pearson <- c(sim.data, actual) pval <- sum(pearson>=actual)/length(pearson) pval ########## New material ############# ### goodness of fit for NB model with continuous predictors #### gala <- read.table('ecol 563/galapagos.txt', header=T) out.NB2 <- glm.nb(Species~log(Area), data=gala) # determine range of observed species counts range(gala$Species) # create probability vectors for each observation out.p <- lapply(1:29, function(x) dnbinom(0:475, mu=fitted(out.NB2)[x], size=out.NB2$theta)) # determine the largest observation for which probability exceeds 5e-6 sapply(1:29, function(x) max((0:475)[out.p[[x]]>5e-6])) # store this largest count as ux gala$ux <- sapply(1:29, function(x) max((0:475)[out.p[[x]]>5e-6])) # set lx to be zero gala$lx <- rep(0,29) # obtain maxiumum calculated probability for each island gala$uy <- sapply(out.p, max) # minimum probability gala$ly <- rep(0,29) # mean for each island gala$mu<-fitted(out.NB2) # observed probability according to the model gala$z <- dnbinom(gala$Species, mu=fitted(out.NB2), size=out.NB2$theta) # prepanel function to set limits for each graph prepanel.ci2 <- function(x, y, ly, lx, uy, ux, subscripts, ...) { list(ylim=range(y, uy[subscripts], ly[subscripts], finite=T), xlim=range(x, ux[subscripts], lx[subscripts], finite=T))} # plot distributions for each island xyplot(z~Species|Island, data=gala, ux=gala$ux, lx=gala$lx, uy=gala$uy, ly=gala$ly, prepanel=prepanel.ci2, panel=function(x,y,subscripts){ panel.xyplot(0:475, dnbinom(0:475, mu=gala$mu[subscripts], size=out.NB2$theta), type='h', col='grey') panel.abline(v=x, col=2, lty=2)}, scale=list(x='free', y='free')) #uppet-tailed p-values upper.p <- 1-pnbinom(gala$Species-1, mu=gala$mu, size=out.NB2$theta) # number of significant p-values sum(upper.p<.05) # number of significant results expected by chance .05*29 # lower-tailed p-values lower.p <- pnbinom(gala$Species, mu=gala$mu, size=out.NB2$theta) sum(lower.p<.05) lower.p